*******************************************************************************
* Replication files for "Lobbyists as Gatekeeper: Theory and Evidence"       
* by Alexander V. Hirsch, Karam Kang, B. Pablo Montagnes, and Hye Young Young
* Journal of Politics 
*******************************************************************************

log using "replication.log", replace
*******************************************************************************
* TABLE 1: Lobbyists and Lobbying Fee: Summary Statistics                               
*******************************************************************************
clear
use contacts.dta
keep if lob_lastname != ""
keep if congress>=110 & congress<=111
drop lob_lastname1-lob_firstname4
merge n:1 registrantid fyear fmonth using registrant_info.dta
drop if _merge == 2
drop _merge
gen year = year(contactdate)
replace year = fyear if year == . & fmonth>3
replace year = fyear-1 if year == . & fmonth<=3
bys lob_lastname lob_firstname registrantid icpsr: gen mm = _n == 1
bys lob_lastname lob_firstname registrantid: egen nummem = sum(mm)
bys lob_lastname lob_firstname registrantid icpsr contactlevel: gen mc = _n == 1
replace mc = 0 if contactlevel == "Staff"
bys lob_lastname lob_firstname registrantid: egen numdmem = sum(mc)
gen a = 0
replace a = 1 if contactlevel == "Member"
bys lob_lastname lob_firstname registrantid: egen numcont = count(a)
bys lob_lastname lob_firstname registrantid: egen numdcont = sum(a)
bys lob_lastname lob_firstname registrantid: egen mfara = mean(startyear)
bys lob_lastname lob_firstname registrantid: egen mnumlob = mean(numlob)
bys lob_lastname lob_firstname registrantid clientcountry year: gen aa = _n == 1
bys lob_lastname lob_firstname registrantid year: egen numclient = sum(aa)
bys lob_lastname lob_firstname registrantid clientcountry year contactlevel: gen am = _n == 1
replace am = 0 if contactlevel == "Staff"
bys lob_lastname lob_firstname registrantid year: egen numdclient = sum(am)
keep lob_lastname lob_firstname registrantid numcont numdcont nummem numdmem mfara mnumlob numclient numdclient year
order lob_lastname lob_firstname registrantid numcont numdcont nummem numdmem mfara mnumlob numclient numdclient year
duplicates drop
collapse (mean) numcont numdcont nummem numdmem mfara mnumlob numclient numdclient, by(lob_lastname lob_firstname registrantid)
merge 1:1 registrantid lob_lastname lob_firstname using lobbyist_attributes.dta
drop if _merge == 2
drop _merge
replace expol = 1 if expol == 2
replace expol = 0 if expol == .
gen lob_dwmean = .
replace lob_dwmean = expol_dw if expol_dw != .
replace lob_dwmean = mem1_dw if mem1_dw! = . & mem2_dw == .
replace lob_dwmean = (mem1_dw+mem2_dw)/2 if mem1_dw! = . & mem2_dw != . & mem3_dw == . 
replace lob_dwmean = (mem1_dw+mem2_dw+mem3_dw)/3 if mem1_dw! = . & mem2_dw != . & mem3_dw != . 
gen govexp = 0
replace govexp = 1 if expol == 1
replace govexp = 1 if staffer == 1
replace govexp = 1 if govexp == 0 & whitehouse == 1

* TABLE 1 Panels on ideology and career history 
tabstat lob_democrat cfscore lob_dwmean expol staffer whitehouse, stat(N mean sd min max) col(stat)
* TABLE 1 Panels on lobbying contacts to Members of Congress
tabstat numcont numdcont nummem numdmem numclient numdclient, stat(N mean sd min max) col(stat)


clear
use contacts_110th.dta
append using contacts_111th.dta
duplicates drop
gen a = 0
replace a = 1 if conagency == "Media" | conagency == "Other"
bys registrantid clientcountry fyear fmonth: egen media = sum(a)
replace a = 0
replace a = 1 if conagency == "Executive"
bys registrantid clientcountry fyear fmonth: egen exec = sum(a)
replace a = 0
replace a = 1 if conagency == "Congress" & memberid == ""
bys registrantid clientcountry fyear fmonth: egen cong_comm = sum(a)
keep registrantid clientcountry fyear fmonth media exec cong_comm 
duplicates drop
label var media "number of media/academia/other contacts"
label var exec "number of executive contacts"
label var cong_comm "number of congressional contacts: committee"
save temp.dta, replace
clear
use contacts.dta
merge n:1 registrantid lob_lastname lob_firstname using lobbyist_attributes.dta
drop if _merge == 2
drop _merge
merge n:n icpsr congress using masterdata_congress.dta
drop if _merge == 2
drop _merge
duplicates drop
replace primary_committee = upper(primary_committee)
gen leader = 0
replace leader = 1 if maj_leader == 1
replace leader = 1 if min_leader == 1
replace leader = 1 if primary_committee == "SPEAKER"
replace leader = 1 if primary_committee == "MAJORITY LEADER"
replace leader = 1 if primary_committee == "MAJORITY WHIP"
replace leader = 1 if primary_committee == "MINORITY LEADER"
replace leader = 1 if primary_committee == "MINORITY WHIP"
replace leader = 1 if chair == 1
label var leader "party leaders"
gen foreign = 0
replace foreign = 1 if primary_committee == "FOREIGN AFFAIRS"
replace foreign = 1 if primary_committee == "FOREIGN RELATIONS"
label var foreign "foreign relations/affairs committee"
gen a = 1
bys registrantid clientcountry fyear fmonth: egen cong = sum(a)
label var cong "number of congressional contacts"
bys registrantid clientcountry fyear fmonth: egen cong_leader = sum(leader)
label var cong_leader "number of contacts to congressional leaders"
bys registrantid clientcountry fyear fmonth: egen cong_foreign = sum(foreign)
label var cong_foreign "number of contacts to foreign affairs/relations"
gen cfdiff = abs(cfscore-polcfscore)
sum cfdiff, detail
replace a = 0
replace a = 1 if cfdiff<r(p50)
bys registrantid clientcountry fyear fmonth: egen cong_close = sum(a)
label var cong_close "number of contacts by a close lobbyist"
gen lob_dwmean = .
replace lob_dwmean = expol_dw if expol_dw != .
replace lob_dwmean = mem1_dw if mem1_dw! = . & mem2_dw == .
replace lob_dwmean = (mem1_dw+mem2_dw)/2 if mem1_dw! = . & mem2_dw != . & mem3_dw == . 
replace lob_dwmean = (mem1_dw+mem2_dw+mem3_dw)/3 if mem1_dw! = . & mem2_dw != . & mem3_dw != . 
gen dwdiff = abs(dwnom1-lob_dwmean)
sum dwdiff, detail
replace a = 0
replace a = 1 if dwdiff<r(p50)
bys registrantid clientcountry fyear fmonth: egen cong_close_dw = sum(a)
label var cong_close_dw "number of contacts by a close lobbyist (DW)"
gen partydiff = .
replace partydiff = 1 if democrat == 1 & lob_rep == 1
replace partydiff = 1 if democrat == 0 & lob_rep == 0
replace partydiff = 0 if democrat == 0 & lob_rep == 1
replace partydiff = 0 if democrat == 1 & lob_rep == 0
replace a = 0
replace a = 1 if partydiff == 0
bys registrantid clientcountry fyear fmonth: egen cong_close_py = sum(a)
label var cong_close_dw "number of contacts by a same-party lobbyist"
gen ck = 0
replace ck = 1 if cfdiff ==.
bys registrantid clientcountry fyear fmonth: egen nocfinfo = min(ck)
keep registrantid clientcountry fyear fmonth cong cong_leader cong_foreign cong_close* nocfinfo
duplicates drop
merge 1:1 registrantid clientcountry fyear fmonth using temp.dta
replace cong = 0 if _merge == 2
replace cong_leader = 0 if _merge == 2
replace cong_foreign = 0 if _merge == 2
replace cong_close = 0 if _merge == 2
replace nocfinfo = 0 if _merge == 2
replace media = 0 if _merge == 1
replace exec = 0 if _merge == 1
replace cong_comm = 0 if _merge == 1
drop _merge
save temp.dta, replace
clear
use clientcountry_yearly.dta
bys clientcountry: egen UNagree = mean(pctagreeus)
keep clientcountry UNagree polity
duplicates drop
sum UNagree, detail
sum polity,detail
save clienttemp.dta, replace
clear
use contracts_list.dta
merge 1:1 registrantid clientcountry fyear fmonth using temp.dta
drop if _merge == 2	
drop _merge
erase temp.dta
merge n:1 registrantid fyear fmonth using registrant_info.dta
drop if _merge == 2
drop _merge
merge n:1 clientcountry using clienttemp.dta
drop if _merge == 2
drop _merge
erase clienttemp.dta
gen lfee = log(fee)
gen numlob2 = numlob*numlob
gen nhclients2 = nhclients*nhclients
replace cong = cong + cong_comm
gen insample = 1
replace insample = 0 if nocfinfo == 1
gen autocracy = 0
replace autocracy = 1 if polity<=0
gen close_auto = cong_close*autocracy
gen close_auto_dw = cong_close_dw*autocracy
gen close_auto_py = cong_close_py*autocracy
gen cong_auto = cong*autocracy
replace fee = fee/1000

* TABLE 1 panel on semi-annual activities per client
sum fee cong media exec startyear domestic numlob if fee>0 & insample == 1
	   

*******************************************************************************
* FIGURE 4 (a): Lobbying Contacts and Ideological Differences                                    
*******************************************************************************
clear 
use main_panel_summary.dta
xtile group = cfdiff, nq(10)
bys group: egen groupx = mean(cfdiff)
bys group dem_max: egen dsmean = mean(any)
bys group dem_max: egen dsn = count(any)
bys group dem_max: egen dsse = sd(any)
replace dsse = 0 if dsse == .
replace dsse = dsse/sqrt(dsn)
replace dsmean = dsmean*100
replace dsse = dsse*100
gen upper = dsmean + 1.96*dsse
gen lower = dsmean - 1.96*dsse
twoway (rcap upper lower groupx if dem_max == 1, lcolor(black) yscale(range(0 6) ) ///
       graphregion(color(white))) (rcap upper lower groupx if dem_max ==0, lcolor(gs7)) ///
	   (scatter dsmean groupx if dem_max == 1, connect(l) msymbol(circle) mcolor(black) lcolor(black)) ///
	   (scatter dsmean groupx if dem_max ==0, connect(l) msymbol(diamond_hollow) mcolor(gs7) lcolor(gs5)) ///
	   , xtitle("Absolute Difference in the CF Scores") ytitle("Probability of Having Any Contact (%)") ///
	   legend(order(3 "Democrat" 4 "Republican") cols(2) pos(2) ring(0)) 

	   
*******************************************************************************
* FIGURE 4 (b): Lobbying Contacts and Ideological Differences                                    
*******************************************************************************
clear
use main_panel_summary.dta
keep if any > 0
xtile group = cfdiff, nq(10)
gen outcome = ncclients
bys group: egen groupx = mean(cfdiff)
bys group: egen dsmean = mean(outcome)
bys group: egen dsn = count(outcome)
bys group: egen dsse = sd(outcome)
replace dsse = 0 if dsse == .
replace dsse = dsse/sqrt(dsn)
gen upper = dsmean + 1.96*dsse
gen lower = dsmean - 1.96*dsse
bys group: egen fracdiff = mean(partydiff)
gen gov = max(expol, staffer, whitehouse)
replace gov = 1 if gov>1
bys group: egen fracgov = mean(gov)

twoway (rcap upper lower groupx, lcolor(black) graphregion(color(white))) ///
	   (scatter dsmean groupx, connect(l) msymbol(circle) mcolor(black) lcolor(black)) ///
	   , xtitle("Absolute Difference in CF Scores") ytitle("Number of Clients with Contacts") legend(off)	      
	   
	   
*******************************************************************************
* TABLE 2: To Which Lobbyists Do Politicians Give Access? (Extensive Margin)                                     
*******************************************************************************
clear
use main_panel_summary.dta
egen polid = group(icpsr)
egen lobid = group(lob_firstname lob_lastname registrantid)
macro define lob_char "numlob numlob2 startyear expol staffer whitehouse"

* regression results
eststo clear
eststo: quietly reg any cfdiff $lob_char, a(polid) cluster(polid)
eststo: quietly reg dany cfdiff $lob_char, a(polid) cluster(polid)
eststo: quietly reg any partydiff $lob_char, a(polid) cluster(polid)
eststo: quietly reg dany partydiff $lob_char, a(polid) cluster(polid)
eststo: quietly reg any dwdiff $lob_char, a(polid) cluster(polid)
eststo: quietly reg dany dwdiff $lob_char, a(polid) cluster(polid)
esttab, star(* 0.10 ** 0.05 *** 0.01) se ar2 keep(cfdiff partydiff dwdiff) b(%5.4f)

* mean dependent variables
tabstat any dany if cfdiff!=. 		// columns (1) and (2)
tabstat any dany if partydiff!=.	// columns (3) and (4)
tabstat any dany if dwdiff!=.		// columsn (5) and (6)
  		   
	   	
**********************************************************************************
* TABLE 3: Which Lobbyists Do Politicians Meet More Frequently? (Intensive Margin)                                       
**********************************************************************************
clear
use main_panel_summary.dta
egen polid = group(icpsr)
egen lobid = group(lob_firstname lob_lastname registrantid)
macro define firm_char "numlob numlob2 startyear"
macro define lob_char "expol staffer whitehouse"
keep if any>0	// focus on pairs with contacts
keep if expol!=. & staffer !=. & whitehouse !=.	// focus on those with career information
gen lnumconts = log(numconts+1)
gen lncclients = log(ncclients+1)
gen lcfdiff = log(cfdiff) 

* regression results
eststo clear
eststo: quietly areg lncclients  lcfdiff $firm_char, a(polid) cluster(polid)
eststo: quietly areg lncclients  lcfdiff $firm_char $lob_char, a(polid) cluster(polid)
eststo: quietly areg select_cur  lcfdiff $firm_char, a(polid) cluster(polid)
eststo: quietly areg select_cur lcfdiff $firm_char $lob_char, a(polid) cluster(polid)
eststo: quietly areg lnumconts   lcfdiff $firm_char, a(polid) cluster(polid)
eststo: quietly areg lnumconts  lcfdiff $firm_char $lob_char, a(polid) cluster(polid)
esttab, star(* 0.10 ** 0.05 *** 0.01) se ar2 keep(lcfdiff expol staffer whitehouse) b(%5.3f)

* mean dependent variables 
tabstat lncclients select_cur lnumconts if any > 0 & lcfdiff!=., col(stat)


*******************************************************************************
* TABLE 4: What Determines Lobbying Fee?                                            
*******************************************************************************
clear
use contacts_110th.dta
append using contacts_111th.dta
duplicates drop
gen a = 0
replace a = 1 if conagency == "Media" | conagency == "Other"
bys registrantid clientcountry fyear fmonth: egen media = sum(a)
replace a = 0
replace a = 1 if conagency == "Executive"
bys registrantid clientcountry fyear fmonth: egen exec = sum(a)
replace a = 0
replace a = 1 if conagency == "Congress" & memberid == ""
bys registrantid clientcountry fyear fmonth: egen cong_comm = sum(a)
replace a = 0
replace a = 1 if issue == "BUD"
bys registrantid clientcountry fyear fmonth: egen iss_bud = max(a)
replace a = 0
replace a = 1 if issue == "SEC"
bys registrantid clientcountry fyear fmonth: egen iss_sec = max(a)
replace a = 0
replace a = 1 if issue == "TRD"
bys registrantid clientcountry fyear fmonth: egen iss_trd = max(a)
keep registrantid clientcountry fyear fmonth media exec cong_comm iss_bud iss_sec iss_trd
duplicates drop
label var media "number of media/academia/other contacts"
label var exec "number of executive contacts"
label var cong_comm "number of congressional contacts: committee"
label var iss_bud "any contracts on budget issue"
label var iss_sec "any contracts on security issue"
label var iss_trd "any contracts on trade issue"
save temp.dta, replace
clear
use contacts.dta
merge n:1 registrantid lob_lastname lob_firstname using lobbyist_attributes.dta
drop if _merge == 2
drop _merge
merge n:n icpsr congress using masterdata_congress.dta
drop if _merge == 2
drop _merge
duplicates drop
replace primary_committee = upper(primary_committee)
gen leader = 0
replace leader = 1 if maj_leader == 1
replace leader = 1 if min_leader == 1
replace leader = 1 if primary_committee == "SPEAKER"
replace leader = 1 if primary_committee == "MAJORITY LEADER"
replace leader = 1 if primary_committee == "MAJORITY WHIP"
replace leader = 1 if primary_committee == "MINORITY LEADER"
replace leader = 1 if primary_committee == "MINORITY WHIP"
replace leader = 1 if chair == 1
label var leader "party leaders"
gen foreign = 0
replace foreign = 1 if primary_committee == "FOREIGN AFFAIRS"
replace foreign = 1 if primary_committee == "FOREIGN RELATIONS"
label var foreign "foreign relations/affairs committee"
gen a = 1
bys registrantid clientcountry fyear fmonth: egen cong = sum(a)
label var cong "number of congressional contacts"
bys registrantid clientcountry fyear fmonth: egen cong_leader = sum(leader)
label var cong_leader "number of contacts to congressional leaders"
bys registrantid clientcountry fyear fmonth: egen cong_foreign = sum(foreign)
label var cong_foreign "number of contacts to foreign affairs/relations"
gen cfdiff = abs(cfscore-polcfscore)
sum cfdiff, detail
replace a = 0
replace a = 1 if cfdiff<r(p50)
bys registrantid clientcountry fyear fmonth: egen cong_close = sum(a)
label var cong_close "number of contacts by a close lobbyist"
gen lob_dwmean = .
replace lob_dwmean = expol_dw if expol_dw != .
replace lob_dwmean = mem1_dw if mem1_dw! = . & mem2_dw == .
replace lob_dwmean = (mem1_dw+mem2_dw)/2 if mem1_dw! = . & mem2_dw != . & mem3_dw == . 
replace lob_dwmean = (mem1_dw+mem2_dw+mem3_dw)/3 if mem1_dw! = . & mem2_dw != . & mem3_dw != . 
gen dwdiff = abs(dwnom1-lob_dwmean)
sum dwdiff, detail
replace a = 0
replace a = 1 if dwdiff<r(p50)
bys registrantid clientcountry fyear fmonth: egen cong_close_dw = sum(a)
label var cong_close_dw "number of contacts by a close lobbyist (DW)"
gen partydiff = .
replace partydiff = 1 if democrat == 1 & lob_rep == 1
replace partydiff = 1 if democrat == 0 & lob_rep == 0
replace partydiff = 0 if democrat == 0 & lob_rep == 1
replace partydiff = 0 if democrat == 1 & lob_rep == 0
replace a = 0
replace a = 1 if partydiff == 0
bys registrantid clientcountry fyear fmonth: egen cong_close_py = sum(a)
label var cong_close_dw "number of contacts by a same-party lobbyist"
gen ck = 0
replace ck = 1 if cfdiff ==.
bys registrantid clientcountry fyear fmonth: egen nocfinfo = min(ck)
keep registrantid clientcountry fyear fmonth cong cong_leader cong_foreign cong_close* nocfinfo
duplicates drop
merge 1:1 registrantid clientcountry fyear fmonth using temp.dta
replace cong = 0 if _merge == 2
replace cong_leader = 0 if _merge == 2
replace cong_foreign = 0 if _merge == 2
replace cong_close = 0 if _merge == 2
replace nocfinfo = 0 if _merge == 2
replace media = 0 if _merge == 1
replace exec = 0 if _merge == 1
replace cong_comm = 0 if _merge == 1
replace iss_bud = 0 if _merge == 1
replace iss_sec = 0 if _merge == 1
replace iss_trd = 0 if _merge == 1
drop _merge
save temp.dta, replace
clear
use clientcountry_yearly.dta
bys clientcountry: egen UNagree = mean(pctagreeus)
keep clientcountry UNagree polity
duplicates drop
sum UNagree, detail
sum polity,detail
save clienttemp.dta, replace
clear
use contracts_list.dta
merge 1:1 registrantid clientcountry fyear fmonth using temp.dta
drop if _merge == 2	
drop _merge
erase temp.dta
merge n:1 registrantid fyear fmonth using registrant_info.dta
drop if _merge == 2
drop _merge
merge n:1 clientcountry using clienttemp.dta
drop if _merge == 2
drop _merge
erase clienttemp.dta
gen lfee = log(fee)
gen numlob2 = numlob*numlob
gen nhclients2 = nhclients*nhclients
replace cong = cong + cong_comm
gen insample = 1
replace insample = 0 if nocfinfo == 1
gen autocracy = 0
replace autocracy = 1 if polity<=0
gen close_auto = cong_close*autocracy
gen close_auto_dw = cong_close_dw*autocracy
gen close_auto_py = cong_close_py*autocracy
gen cong_auto = cong*autocracy
rename cong any
rename cong_close ideologically_aligned
rename cong_auto onbehalfof_autocraacy
rename close_auto ideologically_aligned_autocracy
rename media anymedia
rename exec anyexecutive
rename domestic registeredLDA
rename startyear yearofFARAregistration
rename numlob numberoflobbyist
rename numlob2 numberoflobbyist_squared 
macro define contact_chr "any ideologically_aligned anymedia anyexecutive"
macro define contact_chr2 "onbehalfof_autocraacy ideologically_aligned_autocracy autocracy"
macro define fara_chr "registeredLDA yearofFARAregistration numberoflobbyist numberoflobbyist_squared"

* regression results
eststo clear
eststo: quietly reg lfee $contact_chr if insample == 1, cluster(registrantid) 
eststo: quietly reg lfee $contact_chr $fara_chr if insample == 1 , cluster(registrantid)
eststo: quietly reg lfee $contact_chr $contact_chr2 if insample == 1, cluster(registrantid)
eststo: quietly reg lfee $contact_chr $contact_chr2 $fara_chr if insample == 1 , cluster(registrantid)
esttab, star(* 0.10 ** 0.05 *** 0.01) se ar2 b(%5.4f)



*******************************************************************************
* Appendix Table A1: Lobbying Firms Characteristics by the LDA Registration   
*******************************************************************************
clear
use lobbyist_attributes.dta
keep registrantid lob_lastname lob_firstname expol staffer whitehouse
sort registrantid lob_lastname lob_firstname
bys registrantid: egen numexpol = sum(expol)
bys registrantid: egen numexstaff = sum(staffer)
bys registrantid: egen numexwhite = sum(whitehouse)
keep registrantid numexpol numexstaff numexwhite
duplicates drop
sort registrantid 
save temp.dta, replace 
clear
use contacts.dta
keep if congress>=110 & congress<=111
drop lob_lastname1-lob_firstname4
merge n:1 registrantid fyear fmonth using registrant_info.dta
drop if _merge == 2
drop _merge
sort  registrantid 
merge registrantid using temp.dta
drop if _merge == 2
drop _merge
bys registrantid memberid: gen tt = _n ==1
bys registrantid: egen nummem = sum(tt)
keep registrantid registrantname startyear domestic numlob_all numexpol numexstaff numexwhite nummem 
duplicates drop
sort registrantid 
save temp_firmchar.dta, replace
clear
use contracts_list.dta
bys registrantid fyear: egen totfee = sum(fee)
bys registrantid: egen yearfee = mean(totfee)
bys registrantid clientcountry: gen tt = _n ==1
bys registrantid: egen nclient = sum(tt)
keep registrantid yearfee nclient
duplicates drop
sort registrantid
save temp.dta, replace
clear
use temp_firmchar.dta
sort registrantid
merge registrantid using temp.dta
drop _merge
replace yearfee = yearfee/1000
label var numexpol "number of former member of congress"
label var numexstaff "number of former congress staff"
label var numexwhite "number of executive branch experience"
label var nummem "number of contacted members"
label var yearfee "annual revenue ($K)"
label var nclient "number of government client"
rename yearfee ann_rev
rename nclient num_client
rename nummem num_contact_mem
rename startyear FARA_regis
rename numlob_all all_lobbyists
rename numexpol former_mem_cong
rename numexstaff former_cong_staff
rename numexwhite exec_branch_exp

* descriptive statistics: 
* LDA & FARA 
tabstat ann_rev num_client num_contact_mem FARA_regis all_lobbyists former_mem_cong former_cong_staff exec_branch_exp if domestic ==1, stat(N mean sd) col(stat)
* FARA Only
tabstat ann_rev num_client num_contact_mem FARA_regis all_lobbyists former_mem_cong former_cong_staff exec_branch_exp if domestic ==0, stat(N mean sd) col(stat)


*****************************************************************************
* Appendix Table A2: Report-Level Summary Statistics                        *
*****************************************************************************
clear
use contracts_list.dta 
sort registrantid fyear fmonth 
merge registrantid fyear fmonth using registrant_info.dta
keep if _merge == 3
drop _merge
gen a = 1
bys registrantid fyear fmonth: egen nfclients = sum(a)
bys registrantid fyear fmonth: egen totfee = sum(fee)
keep registrantid fyear fmonth nfclients totfee numlob 
label var nfclients "number of clients during the filing period"
label var totfee "total fees during the filing period"
duplicates drop
merge 1:n registrantid fyear fmonth using contacts.dta
keep if _merge == 3
drop _merge
keep if congress == 110 | congress == 111
save temp_lobsource.dta, replace
clear
use registrant_info.dta
keep registrantid startyear domestic
duplicates drop
sort registrantid
save temp.dta, replace
clear
use temp_lobsource.dta
sort registrantid 
merge registrantid using temp.dta
drop if _merge ==2
save temp_lobsource.dta, replace
erase temp.dta
clear
use temp_lobsource.dta
gen rpt = 0
replace rpt = 1 if lobsource == "report"
gen one = 0
replace one = 1 if lobsource == "one man shop" // numlob > 1 for some cases
bys registrantid fyear fmonth: egen source_rpt = max(rpt)
bys registrantid fyear fmonth: egen source_one = max(one)
bys registrantid fyear fmonth: gen r = _n == 1
gen source_type = "3. not retri"
replace source_type = "2. report" if source_rpt == 1
replace source_type = "1. one-man" if source_rpt == 0 & source_one == 1
* contact attributes
bys registrantid fyear fmonth memberid: gen a = _n ==1
bys registrantid fyear fmonth: egen nummem = sum(a)
drop a
gen nummem_perlob = nummem/numlob
gen a = 1
bys registrantid fyear fmonth: egen numcont = sum(a)
drop a
gen numcont_perlob = numcont/numlob
gen a = 0
replace a = 1 if contactlevel == "Member"
bys registrantid fyear fmonth: egen numdir = sum(a)
drop a
gen numdir_perlob = numdir/numlob
rename totfee fee
replace fee = fee/1000
keep registrantid fyear fmonth source_type fee nfclients nummem numcont numdir startyear domestic numlob 
duplicates drop 
label var source_type "report source type"
label var nummem "number of contacted member"
label var numcont "number of total contacts"
label var numdir "number of direct contacts"

* Column 2: "Inferred, a Single Lobbyist"
tabstat nfclients fee startyear domestic numlob numcont if source_type =="1. one-man", stat(N mean) col(stat)
* Column 3: "Observed from the Report"
tabstat nfclients fee startyear domestic numlob numcont if source_type =="2. report", stat(N mean) col(stat)
* Column 4: "Not Retrieved"
tabstat nfclients fee startyear domestic numlob numcont if source_type =="3. not retri", stat(N mean) col(stat)


******************************************************************************
* Appendix Table A3: Specialization                                            
******************************************************************************
clear
use contacts.dta
merge n:1 registrantid lob_lastname lob_firstname using lobbyist_attributes.dta
keep if _merge == 3
drop _merge
merge n:n icpsr congress using masterdata_congress.dta
drop if _merge == 2
drop _merge
duplicates drop
replace primary_committee = upper(primary_committee)
gen leader = 0
replace leader = 1 if maj_leader == 1
replace leader = 1 if min_leader == 1
replace leader = 1 if primary_committee == "SPEAKER"
replace leader = 1 if primary_committee == "MAJORITY LEADER"
replace leader = 1 if primary_committee == "MAJORITY WHIP"
replace leader = 1 if primary_committee == "MINORITY LEADER"
replace leader = 1 if primary_committee == "MINORITY WHIP"
replace leader = 1 if chair == 1
label var leader "party leaders"
gen foreign = 0
replace foreign = 1 if primary_committee == "FOREIGN AFFAIRS"
replace foreign = 1 if primary_committee == "FOREIGN RELATIONS"
label var foreign "foreign relations/affairs committee"
gen cfdiff = abs(cfscore-polcfscore)
gen lob_dwmean = .
replace lob_dwmean = expol_dw if expol_dw != .
replace lob_dwmean = mem1_dw if mem1_dw! = . & mem2_dw == .
replace lob_dwmean = (mem1_dw+mem2_dw)/2 if mem1_dw! = . & mem2_dw != . & mem3_dw == . 
replace lob_dwmean = (mem1_dw+mem2_dw+mem3_dw)/3 if mem1_dw! = . & mem2_dw != . & mem3_dw != . 
gen dwdiff = abs(dwnom1-lob_dwmean)
label var dwdiff "DWNOMINATE score difference btw politician and lobbyist"
gen partydiff = .
replace partydiff = 1 if democrat == 1 & lob_rep == 1
replace partydiff = 1 if democrat == 0 & lob_rep == 0
replace partydiff = 0 if democrat == 0 & lob_rep == 1
replace partydiff = 0 if democrat == 1 & lob_rep == 0
label var partydiff "Party difference btw politician and lobbyist"
replace issue = "OTH" if issue == "GEN"
replace issue = "OTH" if issue == "ADM"
sum cfdiff, detail
gen close = .
replace close = 0 if cfdiff>=r(p50) & cfdiff!=.
replace close = 1 if cfdiff<r(p50)
gen a = 0
replace a = 1 if issue == "SEC"
bys registrantid lob_lastname lob_firstname icpsr: egen sec_lob = max(a)
replace a = 0
replace a = 1 if issue == "TRD" | issue == "BUD"
bys registrantid lob_lastname lob_firstname icpsr: egen trd_lob = max(a)
bys registrantid lob_lastname lob_firstname icpsr issue: gen ii = _n == 1
bys registrantid lob_lastname lob_firstname icpsr: egen numissue = sum(ii)
gen multilob = .
replace multilob = 1 if sec_lob == 1 & trd_lob == 1
replace multilob = 0 if sec_lob == 1 & trd_lob == 0
replace multilob = 0 if sec_lob == 0 & trd_lob == 1
keep registrantid lob_lastname lob_firstname icpsr cfdiff close sec_lob trd_lob multilob numissue  
duplicates drop
egen lobid = group(registrantid lob_lastname lob_firstname)

* regression results
eststo clear
eststo: quietly areg multilob cfdiff, absorb(lobid) cluster(lobid)
eststo: quietly areg multilob cfdiff i.lobid, absorb(icpsr) cluster(lobid)
eststo: quietly areg multilob close, absorb(lobid) cluster(lobid)
eststo: quietly areg multilob close i.lobid, absorb(icpsr) cluster(lobid)
esttab, star(* 0.10 ** 0.05 *** 0.01) se r2 keep(cfdiff close) b(%5.4f)


******************************************************************************
* Appendix Figure A1: Distribution of Ideology Measures by Party Affiliation *
******************************************************************************
clear
use main_panel_summary.dta
bys registrantid lob_lastname lob_firstname: gen ll = _n == 1
keep if ll == 1
merge 1:1 registrantid lob_lastname lob_firstname using lobbyist_attributes.dta
keep if _merge == 3
drop _merge
gen lob_dwmean = .
replace lob_dwmean = expol_dw if expol_dw != .
replace lob_dwmean = mem1_dw if mem1_dw! = . & mem2_dw == .
replace lob_dwmean = (mem1_dw+mem2_dw)/2 if mem1_dw! = . & mem2_dw != . & mem3_dw == . 
replace lob_dwmean = (mem1_dw+mem2_dw+mem3_dw)/3 if mem1_dw! = . & mem2_dw != . & mem3_dw != . 
* Panel (a)
twoway (hist cfscore if lob_democrat == 1, freq start(-1.25) width(0.05) lcolor(gs12) fcolor(gs12) xtitle("Lobbyist's CF Score")  ///
       graphregion(color(white))) (hist cfscore if lob_republican == 1, freq start(-1.25) width(0.05) fcolor(none) lcolor(black)) ///
       ,legend(order(1 "Democrat Lobbyist" 2 "Republican Lobbyist") cols(3)) xlabel(-1(0.5)1) xscale(range(-1.25 1.25)) 
	   
* Panel (b)
twoway (hist lob_dwmean if lob_democrat == 1, freq start(-.60) width(0.05) lcolor(gs12) fcolor(gs12) xtitle("Lobbyist's DW-NOMINATE Score")  ///
       graphregion(color(white))) (hist lob_dwmean if lob_republican == 1, freq start(-.60) width(0.05) fcolor(none) lcolor(black)) ///
       ,legend(order(1 "Democrat Lobbyist" 2 "Republican Lobbyist") cols(3)) xlabel(-1(0.5)1) xscale(range(-1 1)) 

erase temp_firmchar.dta	   
erase temp_lobsource.dta
	   
log close
